In [1]:
import pandas as pd
import numpy as np
import seaborn as sbn
import altair as alt
import scipy.stats as stats
import matplotlib.pyplot as plt
In [2]:
file = "/Users/anair/Downloads/cleaned_players.csv"
df = pd.read_csv(file)
In [3]:
df.columns
Out[3]:
Index(['first_name', 'second_name', 'goals_scored', 'assists', 'total_points',
'minutes', 'goals_conceded', 'creativity', 'influence', 'threat',
'bonus', 'bps', 'ict_index', 'clean_sheets', 'red_cards',
'yellow_cards', 'selected_by_percent', 'now_cost', 'element_type'],
dtype='object')
In [4]:
df.sample(20)
Out[4]:
| first_name | second_name | goals_scored | assists | total_points | minutes | goals_conceded | creativity | influence | threat | bonus | bps | ict_index | clean_sheets | red_cards | yellow_cards | selected_by_percent | now_cost | element_type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 287 | Matt | Turner | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 7.2 | 39 | GK |
| 723 | Kevin | Danso | 0 | 0 | 16 | 842 | 18 | 34.4 | 242.0 | 72.0 | 0 | 104 | 34.9 | 1 | 0 | 0 | 0.0 | 44 | DEF |
| 123 | Ethan | Brierley | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.1 | 44 | MID |
| 250 | Caleb | Wiley | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 40 | DEF |
| 139 | Myles | Peart-Harris | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 45 | MID |
| 603 | Willy | Boly | 0 | 0 | 6 | 146 | 1 | 0.4 | 51.0 | 17.0 | 0 | 26 | 6.8 | 0 | 0 | 1 | 0.1 | 42 | DEF |
| 587 | John | Ruddy | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.3 | 39 | GK |
| 466 | James | McConnell | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.1 | 43 | MID |
| 314 | Iliman | Ndiaye | 9 | 0 | 114 | 2426 | 24 | 326.4 | 598.0 | 493.0 | 21 | 552 | 142.0 | 15 | 0 | 3 | 1.1 | 52 | FWD |
| 502 | Rúben | Gato Alves Dias | 0 | 0 | 90 | 2269 | 21 | 173.8 | 478.2 | 150.0 | 7 | 410 | 80.2 | 11 | 0 | 4 | 5.5 | 55 | DEF |
| 344 | Luke | Harris | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 45 | MID |
| 558 | Chido | Obi-Martin | 0 | 0 | 7 | 160 | 5 | 5.1 | 8.2 | 46.0 | 0 | 21 | 5.4 | 0 | 0 | 1 | 0.2 | 45 | FWD |
| 257 | Mathis | Amougou | 0 | 0 | 1 | 7 | 0 | 0.9 | 1.0 | 0.0 | 0 | 2 | 0.2 | 0 | 0 | 0 | 0.0 | 45 | MID |
| 173 | Ibrahim | Osman | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 50 | MID |
| 742 | Konstantinos | Mavropanos | 0 | 0 | 45 | 2035 | 39 | 74.9 | 497.0 | 160.0 | 4 | 234 | 73.2 | 2 | 1 | 4 | 0.1 | 43 | DEF |
| 12 | Declan | Rice | 4 | 7 | 127 | 2823 | 29 | 959.6 | 733.0 | 399.0 | 17 | 627 | 208.8 | 11 | 1 | 5 | 4.2 | 63 | MID |
| 616 | Omar | Richards | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 45 | DEF |
| 251 | Filip | Jørgensen | 0 | 0 | 18 | 540 | 9 | 0.0 | 157.6 | 0.0 | 0 | 59 | 15.9 | 1 | 0 | 0 | 0.4 | 42 | GK |
| 37 | Leon | Bailey | 1 | 2 | 47 | 1132 | 19 | 235.8 | 146.4 | 325.0 | 2 | 174 | 70.3 | 2 | 0 | 3 | 0.8 | 62 | MID |
| 523 | Bruno | Borges Fernandes | 8 | 12 | 174 | 3017 | 47 | 1407.7 | 1017.8 | 587.0 | 28 | 810 | 300.8 | 10 | 2 | 3 | 13.7 | 84 | MID |
In [5]:
df["Full_Name"] = df.apply(lambda row: row['first_name'] + ' ' + row['second_name'], axis = 1)
df["Full_Name"]
Out[5]:
0 Fábio Ferreira Vieira
1 Gabriel Fernando de Jesus
2 Gabriel dos Santos Magalhães
3 Kai Havertz
4 Karl Hein
...
799 Emmanuel Agbadou
800 Vítor Manuel de Oliveira Lopes Pereira
801 Nasser Djiga
802 Marshall Munetsi
803 Mateus Mané
Name: Full_Name, Length: 804, dtype: object
In [6]:
df_topgoalscorers = df.loc[df["goals_scored"] > 15, "Full_Name"]
print(f" All top goalscorers :\n {df_topgoalscorers}")
All top goalscorers : 67 Ollie Watkins 135 Bryan Mbeumo 146 Yoane Wissa 464 Mohamed Salah 493 Erling Haaland 574 Alexander Isak 624 Chris Wood Name: Full_Name, dtype: object
In [7]:
df.head(10)
Out[7]:
| first_name | second_name | goals_scored | assists | total_points | minutes | goals_conceded | creativity | influence | threat | bonus | bps | ict_index | clean_sheets | red_cards | yellow_cards | selected_by_percent | now_cost | element_type | Full_Name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Fábio | Ferreira Vieira | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 54 | MID | Fábio Ferreira Vieira |
| 1 | Gabriel | Fernando de Jesus | 3 | 2 | 42 | 600 | 5 | 119.5 | 154.4 | 255.0 | 6 | 152 | 52.6 | 2 | 0 | 4 | 1.0 | 65 | FWD | Gabriel Fernando de Jesus |
| 2 | Gabriel | dos Santos Magalhães | 3 | 2 | 117 | 2363 | 22 | 208.8 | 584.6 | 287.0 | 9 | 459 | 108.2 | 10 | 0 | 4 | 12.2 | 61 | DEF | Gabriel dos Santos Magalhães |
| 3 | Kai | Havertz | 9 | 3 | 97 | 1872 | 20 | 269.0 | 467.6 | 711.0 | 14 | 343 | 144.8 | 7 | 0 | 5 | 6.2 | 77 | FWD | Kai Havertz |
| 4 | Karl | Hein | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 40 | GK | Karl Hein |
| 5 | Jurriën | Timber | 1 | 3 | 114 | 2415 | 21 | 296.8 | 393.0 | 254.0 | 9 | 426 | 94.2 | 11 | 0 | 7 | 4.3 | 55 | DEF | Jurriën Timber |
| 6 | Jorge Luiz | Frello Filho | 0 | 0 | 20 | 701 | 7 | 88.8 | 44.8 | 8.0 | 0 | 112 | 14.2 | 2 | 0 | 5 | 0.2 | 46 | MID | Jorge Luiz Frello Filho |
| 7 | Jakub | Kiwior | 1 | 0 | 43 | 1117 | 15 | 91.3 | 267.2 | 32.0 | 2 | 178 | 39.2 | 3 | 0 | 1 | 3.8 | 49 | DEF | Jakub Kiwior |
| 8 | Gabriel | Martinelli Silva | 8 | 4 | 125 | 2284 | 25 | 587.8 | 559.8 | 912.0 | 6 | 422 | 206.1 | 10 | 0 | 1 | 3.1 | 65 | MID | Gabriel Martinelli Silva |
| 9 | Ethan | Nwaneri | 4 | 2 | 66 | 882 | 4 | 282.8 | 293.2 | 255.0 | 4 | 251 | 83.0 | 4 | 0 | 1 | 1.5 | 43 | MID | Ethan Nwaneri |
In [8]:
df_topbonus= df.nlargest(10, 'bonus')
df_topbonus
Out[8]:
| first_name | second_name | goals_scored | assists | total_points | minutes | goals_conceded | creativity | influence | threat | bonus | bps | ict_index | clean_sheets | red_cards | yellow_cards | selected_by_percent | now_cost | element_type | Full_Name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 464 | Mohamed | Salah | 29 | 18 | 344 | 3374 | 40 | 1199.2 | 1577.0 | 1985.0 | 55 | 1133 | 476.0 | 15 | 0 | 1 | 66.3 | 136 | MID | Mohamed Salah |
| 624 | Chris | Wood | 20 | 3 | 200 | 2958 | 37 | 320.0 | 838.4 | 900.0 | 41 | 744 | 205.6 | 15 | 0 | 1 | 30.8 | 72 | FWD | Chris Wood |
| 767 | Matheus | Santos Carneiro Da Cunha | 15 | 7 | 178 | 2596 | 54 | 847.8 | 936.2 | 940.0 | 41 | 838 | 272.2 | 7 | 0 | 4 | 13.6 | 70 | FWD | Matheus Santos Carneiro Da Cunha |
| 574 | Alexander | Isak | 23 | 6 | 211 | 2758 | 41 | 573.1 | 1043.0 | 1320.0 | 36 | 848 | 292.7 | 12 | 0 | 1 | 49.4 | 94 | FWD | Alexander Isak |
| 242 | Cole | Palmer | 15 | 10 | 214 | 3193 | 41 | 1259.2 | 1068.2 | 1052.0 | 35 | 952 | 338.0 | 10 | 0 | 7 | 34.3 | 105 | MID | Cole Palmer |
| 67 | Ollie | Watkins | 16 | 8 | 186 | 2593 | 40 | 345.2 | 766.0 | 1148.0 | 32 | 631 | 223.8 | 10 | 0 | 2 | 29.0 | 92 | FWD | Ollie Watkins |
| 146 | Yoane | Wissa | 18 | 6 | 185 | 2921 | 45 | 400.0 | 849.4 | 1181.0 | 32 | 739 | 242.4 | 9 | 0 | 5 | 24.2 | 69 | FWD | Yoane Wissa |
| 135 | Bryan | Mbeumo | 20 | 9 | 236 | 3415 | 55 | 1107.5 | 1236.8 | 1060.0 | 29 | 948 | 340.8 | 9 | 0 | 3 | 46.7 | 83 | MID | Bryan Mbeumo |
| 523 | Bruno | Borges Fernandes | 8 | 12 | 174 | 3017 | 47 | 1407.7 | 1017.8 | 587.0 | 28 | 810 | 300.8 | 10 | 2 | 3 | 13.7 | 84 | MID | Bruno Borges Fernandes |
| 277 | Jean-Philippe | Mateta | 14 | 2 | 150 | 2642 | 37 | 405.4 | 671.6 | 800.0 | 27 | 576 | 187.3 | 9 | 0 | 2 | 8.3 | 75 | FWD | Jean-Philippe Mateta |
In [9]:
df_highest_selected = df[df["selected_by_percent"] > 20]
df_highest_selected
Out[9]:
| first_name | second_name | goals_scored | assists | total_points | minutes | goals_conceded | creativity | influence | threat | bonus | bps | ict_index | clean_sheets | red_cards | yellow_cards | selected_by_percent | now_cost | element_type | Full_Name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11 | David | Raya Martin | 0 | 0 | 142 | 3420 | 34 | 10.7 | 755.4 | 0.0 | 10 | 555 | 76.7 | 13 | 0 | 3 | 32.6 | 56 | GK | David Raya Martin |
| 13 | Bukayo | Saka | 6 | 11 | 127 | 1724 | 21 | 842.8 | 606.0 | 830.0 | 18 | 508 | 227.9 | 7 | 0 | 3 | 20.2 | 104 | MID | Bukayo Saka |
| 14 | William | Saliba | 2 | 0 | 130 | 3039 | 29 | 150.1 | 672.6 | 116.0 | 14 | 564 | 94.2 | 12 | 1 | 2 | 25.8 | 64 | DEF | William Saliba |
| 63 | Morgan | Rogers | 8 | 11 | 161 | 3115 | 40 | 722.7 | 730.6 | 689.0 | 15 | 534 | 214.2 | 10 | 0 | 10 | 25.8 | 58 | MID | Morgan Rogers |
| 67 | Ollie | Watkins | 16 | 8 | 186 | 2593 | 40 | 345.2 | 766.0 | 1148.0 | 32 | 631 | 223.8 | 10 | 0 | 2 | 29.0 | 92 | FWD | Ollie Watkins |
| 135 | Bryan | Mbeumo | 20 | 9 | 236 | 3415 | 55 | 1107.5 | 1236.8 | 1060.0 | 29 | 948 | 340.8 | 9 | 0 | 3 | 46.7 | 83 | MID | Bryan Mbeumo |
| 146 | Yoane | Wissa | 18 | 6 | 185 | 2921 | 45 | 400.0 | 849.4 | 1181.0 | 32 | 739 | 242.4 | 9 | 0 | 5 | 24.2 | 69 | FWD | Yoane Wissa |
| 242 | Cole | Palmer | 15 | 10 | 214 | 3193 | 41 | 1259.2 | 1068.2 | 1052.0 | 35 | 952 | 338.0 | 10 | 0 | 7 | 34.3 | 105 | MID | Cole Palmer |
| 464 | Mohamed | Salah | 29 | 18 | 344 | 3374 | 40 | 1199.2 | 1577.0 | 1985.0 | 55 | 1133 | 476.0 | 15 | 0 | 1 | 66.3 | 136 | MID | Mohamed Salah |
| 474 | Virgil | van Dijk | 3 | 1 | 143 | 3330 | 38 | 206.6 | 932.6 | 299.0 | 8 | 631 | 143.7 | 14 | 0 | 5 | 25.6 | 67 | DEF | Virgil van Dijk |
| 492 | Joško | Gvardiol | 5 | 0 | 153 | 3278 | 41 | 501.7 | 847.0 | 497.0 | 15 | 623 | 185.0 | 13 | 0 | 2 | 38.6 | 65 | DEF | Joško Gvardiol |
| 493 | Erling | Haaland | 22 | 3 | 181 | 2736 | 38 | 359.4 | 946.0 | 1511.0 | 26 | 795 | 281.8 | 10 | 0 | 2 | 24.0 | 149 | FWD | Erling Haaland |
| 517 | Pep | Guardiola | 0 | 0 | 113 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 29.2 | 15 | AM | Pep Guardiola |
| 518 | Omar | Marmoush | 7 | 1 | 73 | 1174 | 12 | 246.7 | 318.0 | 410.0 | 12 | 326 | 97.3 | 8 | 0 | 0 | 22.6 | 74 | FWD | Omar Marmoush |
| 574 | Alexander | Isak | 23 | 6 | 211 | 2758 | 41 | 573.1 | 1043.0 | 1320.0 | 36 | 848 | 292.7 | 12 | 0 | 1 | 49.4 | 94 | FWD | Alexander Isak |
| 624 | Chris | Wood | 20 | 3 | 200 | 2958 | 37 | 320.0 | 838.4 | 900.0 | 41 | 744 | 205.6 | 15 | 0 | 1 | 30.8 | 72 | FWD | Chris Wood |
In [10]:
[
method_name
for method_name in dir(df.plot)
if not method_name.startswith("_")
]
Out[10]:
['area', 'bar', 'barh', 'box', 'density', 'hexbin', 'hist', 'kde', 'line', 'pie', 'scatter']
In [11]:
df_2 = df[['total_points', 'minutes', 'Full_Name', 'threat', 'element_type']]
In [12]:
df.columns
Out[12]:
Index(['first_name', 'second_name', 'goals_scored', 'assists', 'total_points',
'minutes', 'goals_conceded', 'creativity', 'influence', 'threat',
'bonus', 'bps', 'ict_index', 'clean_sheets', 'red_cards',
'yellow_cards', 'selected_by_percent', 'now_cost', 'element_type',
'Full_Name'],
dtype='object')
In [13]:
df_2['pscore'] = df_2['minutes']/df_2['total_points']
/var/folders/lz/5bcqk50d4p1_59b0v5spmshr0000gq/T/ipykernel_4016/2037718134.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_2['pscore'] = df_2['minutes']/df_2['total_points']
fig, axs = plt.subplots(figsize=(12, 4)) Create an empty Matplotlib Figure and Axes¶
air_quality.plot.area(ax=axs) Use pandas to put the area plot on the prepared Figure/Axes¶
axs.set_ylabel("NO$_2$ concentration") Do any Matplotlib customization you like¶
fig.savefig("no2_concentrations.png") Save the Figure/Axes using the existing Matplotlib method.¶
plt.show() Display the plot¶
In [14]:
df_2
Out[14]:
| total_points | minutes | Full_Name | threat | element_type | pscore | |
|---|---|---|---|---|---|---|
| 0 | 0 | 0 | Fábio Ferreira Vieira | 0.0 | MID | NaN |
| 1 | 42 | 600 | Gabriel Fernando de Jesus | 255.0 | FWD | 14.285714 |
| 2 | 117 | 2363 | Gabriel dos Santos Magalhães | 287.0 | DEF | 20.196581 |
| 3 | 97 | 1872 | Kai Havertz | 711.0 | FWD | 19.298969 |
| 4 | 0 | 0 | Karl Hein | 0.0 | GK | NaN |
| ... | ... | ... | ... | ... | ... | ... |
| 799 | 45 | 1410 | Emmanuel Agbadou | 67.0 | DEF | 31.333333 |
| 800 | 111 | 0 | Vítor Manuel de Oliveira Lopes Pereira | 0.0 | AM | 0.000000 |
| 801 | 4 | 65 | Nasser Djiga | 0.0 | DEF | 16.250000 |
| 802 | 50 | 1077 | Marshall Munetsi | 212.0 | MID | 21.540000 |
| 803 | 1 | 1 | Mateus Mané | 0.0 | FWD | 1.000000 |
804 rows × 6 columns
In [15]:
df_p = df_2[df_2['pscore'].notna()]
In [16]:
df_p
Out[16]:
| total_points | minutes | Full_Name | threat | element_type | pscore | |
|---|---|---|---|---|---|---|
| 1 | 42 | 600 | Gabriel Fernando de Jesus | 255.0 | FWD | 14.285714 |
| 2 | 117 | 2363 | Gabriel dos Santos Magalhães | 287.0 | DEF | 20.196581 |
| 3 | 97 | 1872 | Kai Havertz | 711.0 | FWD | 19.298969 |
| 5 | 114 | 2415 | Jurriën Timber | 254.0 | DEF | 21.184211 |
| 6 | 20 | 701 | Jorge Luiz Frello Filho | 8.0 | MID | 35.050000 |
| ... | ... | ... | ... | ... | ... | ... |
| 799 | 45 | 1410 | Emmanuel Agbadou | 67.0 | DEF | 31.333333 |
| 800 | 111 | 0 | Vítor Manuel de Oliveira Lopes Pereira | 0.0 | AM | 0.000000 |
| 801 | 4 | 65 | Nasser Djiga | 0.0 | DEF | 16.250000 |
| 802 | 50 | 1077 | Marshall Munetsi | 212.0 | MID | 21.540000 |
| 803 | 1 | 1 | Mateus Mané | 0.0 | FWD | 1.000000 |
582 rows × 6 columns
In [17]:
sbn.scatterplot(data = df_p, x = 'minutes', y ='pscore', hue = 'element_type', size ='threat')
plt.title("Scatter Plot of minutes played and pscore for players")
plt.show()
In [18]:
plt.figure(figsize=(14,8))
df_p['element_type'] = df_p['element_type'].astype('category').cat.codes
plt.scatter(df_p['minutes'], df_p['total_points'], s = df_p['pscore'], c = df_p['element_type'], cmap = 'viridis', alpha =0.7)
plt.colorbar(label = 'Element Type')
plt.ylabel("Total points earned")
plt.xlabel("Minutes Played")
plt.title("Position Based Threat & Score")
plt.show()
/var/folders/lz/5bcqk50d4p1_59b0v5spmshr0000gq/T/ipykernel_4016/876716628.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_p['element_type'] = df_p['element_type'].astype('category').cat.codes
/Users/anair/.pyenv/versions/3.10.8/lib/python3.10/site-packages/matplotlib/collections.py:996: RuntimeWarning: invalid value encountered in sqrt
scale = np.sqrt(self._sizes) * dpi / 72.0 * self._factor
In [19]:
fig, axs = plt.subplots(1,2, figsize =(16,6))
## First Plot
axs[0].scatter(df_p['minutes'], df_p['total_points'], s = df_p['pscore'], c = df_p['element_type'], cmap = 'viridis', alpha =0.7)
axs[0].set_ylabel("Total points earned")
axs[0].set_xlabel("Minutes Played")
axs[0].set_title("Position Based Threat & Score")
fig.colorbar(axs[0].collections[0], ax = axs[0], label ='element Type')
## Second Plot
df_top = df_p.sort_values(by='threat', ascending=False).head(20)
axs[1].barh(df_top['Full_Name'], df_top['threat'], color='skyblue')
axs[1].set_title("Players and threat score")
axs[1].set_xlabel("Top Players")
axs[1].set_label("Threat Score")
axs[1].invert_yaxis()
plt.tight_layout()
plt.show()
In [20]:
df_p
Out[20]:
| total_points | minutes | Full_Name | threat | element_type | pscore | |
|---|---|---|---|---|---|---|
| 1 | 42 | 600 | Gabriel Fernando de Jesus | 255.0 | 2 | 14.285714 |
| 2 | 117 | 2363 | Gabriel dos Santos Magalhães | 287.0 | 1 | 20.196581 |
| 3 | 97 | 1872 | Kai Havertz | 711.0 | 2 | 19.298969 |
| 5 | 114 | 2415 | Jurriën Timber | 254.0 | 1 | 21.184211 |
| 6 | 20 | 701 | Jorge Luiz Frello Filho | 8.0 | 4 | 35.050000 |
| ... | ... | ... | ... | ... | ... | ... |
| 799 | 45 | 1410 | Emmanuel Agbadou | 67.0 | 1 | 31.333333 |
| 800 | 111 | 0 | Vítor Manuel de Oliveira Lopes Pereira | 0.0 | 0 | 0.000000 |
| 801 | 4 | 65 | Nasser Djiga | 0.0 | 1 | 16.250000 |
| 802 | 50 | 1077 | Marshall Munetsi | 212.0 | 4 | 21.540000 |
| 803 | 1 | 1 | Mateus Mané | 0.0 | 2 | 1.000000 |
582 rows × 6 columns
In [21]:
df
Out[21]:
| first_name | second_name | goals_scored | assists | total_points | minutes | goals_conceded | creativity | influence | threat | bonus | bps | ict_index | clean_sheets | red_cards | yellow_cards | selected_by_percent | now_cost | element_type | Full_Name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Fábio | Ferreira Vieira | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 54 | MID | Fábio Ferreira Vieira |
| 1 | Gabriel | Fernando de Jesus | 3 | 2 | 42 | 600 | 5 | 119.5 | 154.4 | 255.0 | 6 | 152 | 52.6 | 2 | 0 | 4 | 1.0 | 65 | FWD | Gabriel Fernando de Jesus |
| 2 | Gabriel | dos Santos Magalhães | 3 | 2 | 117 | 2363 | 22 | 208.8 | 584.6 | 287.0 | 9 | 459 | 108.2 | 10 | 0 | 4 | 12.2 | 61 | DEF | Gabriel dos Santos Magalhães |
| 3 | Kai | Havertz | 9 | 3 | 97 | 1872 | 20 | 269.0 | 467.6 | 711.0 | 14 | 343 | 144.8 | 7 | 0 | 5 | 6.2 | 77 | FWD | Kai Havertz |
| 4 | Karl | Hein | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 40 | GK | Karl Hein |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 799 | Emmanuel | Agbadou | 1 | 0 | 45 | 1410 | 22 | 57.3 | 395.2 | 67.0 | 1 | 223 | 52.0 | 4 | 0 | 3 | 0.4 | 40 | DEF | Emmanuel Agbadou |
| 800 | Vítor Manuel | de Oliveira Lopes Pereira | 0 | 0 | 111 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 2.3 | 8 | AM | Vítor Manuel de Oliveira Lopes Pereira |
| 801 | Nasser | Djiga | 0 | 0 | 4 | 65 | 2 | 2.1 | 5.6 | 0.0 | 0 | 11 | 0.8 | 0 | 0 | 0 | 0.0 | 45 | DEF | Nasser Djiga |
| 802 | Marshall | Munetsi | 2 | 2 | 50 | 1077 | 11 | 132.9 | 176.8 | 212.0 | 4 | 134 | 52.3 | 5 | 0 | 0 | 0.1 | 50 | MID | Marshall Munetsi |
| 803 | Mateus | Mané | 0 | 0 | 1 | 1 | 0 | 0.3 | 0.0 | 0.0 | 0 | 3 | 0.0 | 0 | 0 | 0 | 0.1 | 45 | FWD | Mateus Mané |
804 rows × 20 columns
Top Performers by Position¶
In [22]:
df_p.sample(5)
Out[22]:
| total_points | minutes | Full_Name | threat | element_type | pscore | |
|---|---|---|---|---|---|---|
| 779 | 87 | 2975 | João Victor Gomes da Silva | 231.0 | 4 | 34.195402 |
| 457 | 127 | 1921 | Cody Gakpo | 686.0 | 2 | 15.125984 |
| 334 | 27 | 483 | Reiss Nelson | 195.0 | 4 | 17.888889 |
| 583 | 34 | 783 | Sean Longstaff | 76.0 | 4 | 23.029412 |
| 317 | 105 | 2922 | James Tarkowski | 217.0 | 1 | 27.828571 |
In [23]:
high_perf = df_p.groupby('element_type').apply(lambda x: x.nlargest(3, 'total_points'))[['Full_Name', 'total_points', 'minutes']]
high_perf
/var/folders/lz/5bcqk50d4p1_59b0v5spmshr0000gq/T/ipykernel_4016/724788543.py:1: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
high_perf = df_p.groupby('element_type').apply(lambda x: x.nlargest(3, 'total_points'))[['Full_Name', 'total_points', 'minutes']]
Out[23]:
| Full_Name | total_points | minutes | ||
|---|---|---|---|---|
| element_type | ||||
| 0 | 330 | David Moyes | 137 | 0 |
| 297 | Oliver Glasner | 128 | 0 | |
| 208 | Fabian Hürzeler | 119 | 0 | |
| 1 | 492 | Joško Gvardiol | 153 | 3278 |
| 448 | Trent Alexander-Arnold | 148 | 2362 | |
| 627 | Nikola Milenković | 145 | 3330 | |
| 2 | 574 | Alexander Isak | 211 | 2758 |
| 624 | Chris Wood | 200 | 2958 | |
| 67 | Ollie Watkins | 186 | 2593 | |
| 3 | 316 | Jordan Pickford | 158 | 3420 |
| 621 | Matz Sels | 150 | 3420 | |
| 11 | David Raya Martin | 142 | 3420 | |
| 4 | 464 | Mohamed Salah | 344 | 3374 |
| 135 | Bryan Mbeumo | 236 | 3415 | |
| 242 | Cole Palmer | 214 | 3193 |
Efficiency Metric¶
In [24]:
df_p
Out[24]:
| total_points | minutes | Full_Name | threat | element_type | pscore | |
|---|---|---|---|---|---|---|
| 1 | 42 | 600 | Gabriel Fernando de Jesus | 255.0 | 2 | 14.285714 |
| 2 | 117 | 2363 | Gabriel dos Santos Magalhães | 287.0 | 1 | 20.196581 |
| 3 | 97 | 1872 | Kai Havertz | 711.0 | 2 | 19.298969 |
| 5 | 114 | 2415 | Jurriën Timber | 254.0 | 1 | 21.184211 |
| 6 | 20 | 701 | Jorge Luiz Frello Filho | 8.0 | 4 | 35.050000 |
| ... | ... | ... | ... | ... | ... | ... |
| 799 | 45 | 1410 | Emmanuel Agbadou | 67.0 | 1 | 31.333333 |
| 800 | 111 | 0 | Vítor Manuel de Oliveira Lopes Pereira | 0.0 | 0 | 0.000000 |
| 801 | 4 | 65 | Nasser Djiga | 0.0 | 1 | 16.250000 |
| 802 | 50 | 1077 | Marshall Munetsi | 212.0 | 4 | 21.540000 |
| 803 | 1 | 1 | Mateus Mané | 0.0 | 2 | 1.000000 |
582 rows × 6 columns
In [25]:
eff = df_p.query('minutes>=750').nsmallest(10,'pscore')[['Full_Name', 'threat']]
eff
Out[25]:
| Full_Name | threat | |
|---|---|---|
| 464 | Mohamed Salah | 1985.0 |
| 349 | Rodrigo Muniz Carvalho | 508.0 |
| 574 | Alexander Isak | 1320.0 |
| 463 | Luis Díaz | 946.0 |
| 9 | Ethan Nwaneri | 255.0 |
| 566 | Harvey Barnes | 701.0 |
| 694 | James Maddison | 495.0 |
| 13 | Bukayo Saka | 830.0 |
| 453 | Diogo Teixeira da Silva | 537.0 |
| 67 | Ollie Watkins | 1148.0 |
In [26]:
df.sample(5)
Out[26]:
| first_name | second_name | goals_scored | assists | total_points | minutes | goals_conceded | creativity | influence | threat | bonus | bps | ict_index | clean_sheets | red_cards | yellow_cards | selected_by_percent | now_cost | element_type | Full_Name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 38 | Ross | Barkley | 3 | 1 | 44 | 567 | 10 | 128.1 | 215.4 | 206.0 | 5 | 166 | 54.8 | 1 | 0 | 3 | 0.2 | 52 | MID | Ross Barkley |
| 361 | Martial | Godo | 0 | 0 | 2 | 21 | 0 | 12.0 | 3.4 | 14.0 | 0 | 4 | 3.0 | 0 | 0 | 0 | 0.0 | 44 | MID | Martial Godo |
| 461 | Caoimhin | Kelleher | 0 | 0 | 45 | 900 | 12 | 0.0 | 224.2 | 0.0 | 1 | 156 | 22.4 | 4 | 0 | 0 | 1.2 | 39 | GK | Caoimhin Kelleher |
| 356 | Carlos Vinícius | Alves Morais | 0 | 0 | 3 | 13 | 1 | 0.0 | 4.0 | 19.0 | 0 | 14 | 2.3 | 0 | 0 | 0 | 0.3 | 50 | FWD | Carlos Vinícius Alves Morais |
| 683 | Bryan | Gil Salvatierra | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0 | 0 | 0.0 | 0 | 0 | 0 | 0.0 | 50 | MID | Bryan Gil Salvatierra |
In [27]:
df['gc'] = df['goals_scored'] + df['assists']
df['gc_per_game'] = (df['gc'] / df['minutes']) * 90
df.query('minutes > 0').sort_values('gc_per_game', ascending=False).head(10)
Out[27]:
| first_name | second_name | goals_scored | assists | total_points | minutes | goals_conceded | creativity | influence | threat | ... | ict_index | clean_sheets | red_cards | yellow_cards | selected_by_percent | now_cost | element_type | Full_Name | gc | gc_per_game | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 700 | Dane | Scarlett | 0 | 1 | 6 | 31 | 1 | 11.0 | 24.4 | 17.0 | ... | 5.2 | 0 | 0 | 0 | 0.7 | 44 | FWD | Dane Scarlett | 1 | 2.903226 |
| 296 | Romain | Esse | 1 | 2 | 22 | 138 | 3 | 38.4 | 74.8 | 51.0 | ... | 16.4 | 0 | 0 | 0 | 0.1 | 47 | MID | Romain Esse | 3 | 1.956522 |
| 278 | Matheus | França de Oliveira | 1 | 0 | 9 | 52 | 0 | 1.2 | 48.4 | 66.0 | ... | 11.5 | 0 | 0 | 0 | 0.2 | 44 | MID | Matheus França de Oliveira | 1 | 1.730769 |
| 737 | Danny | Ings | 1 | 3 | 32 | 270 | 4 | 76.5 | 97.0 | 159.0 | ... | 33.4 | 0 | 0 | 0 | 1.2 | 48 | FWD | Danny Ings | 4 | 1.333333 |
| 464 | Mohamed | Salah | 29 | 18 | 344 | 3374 | 40 | 1199.2 | 1577.0 | 1985.0 | ... | 476.0 | 15 | 0 | 1 | 66.3 | 136 | MID | Mohamed Salah | 47 | 1.253705 |
| 254 | Tyrique | George | 1 | 1 | 17 | 177 | 0 | 72.7 | 67.4 | 32.0 | ... | 17.1 | 1 | 0 | 1 | 0.0 | 45 | MID | Tyrique George | 2 | 1.016949 |
| 47 | Jhon | Durán | 7 | 0 | 53 | 622 | 14 | 68.9 | 273.8 | 334.0 | ... | 67.5 | 2 | 1 | 3 | 1.6 | 57 | FWD | Jhon Durán | 7 | 1.012862 |
| 574 | Alexander | Isak | 23 | 6 | 211 | 2758 | 41 | 573.1 | 1043.0 | 1320.0 | ... | 292.7 | 12 | 0 | 1 | 49.4 | 94 | FWD | Alexander Isak | 29 | 0.946338 |
| 358 | Ryan | Sessegnon | 4 | 2 | 57 | 574 | 10 | 158.8 | 284.0 | 227.0 | ... | 67.1 | 1 | 0 | 1 | 2.6 | 42 | DEF | Ryan Sessegnon | 6 | 0.940767 |
| 77 | Donyell | Malen | 3 | 0 | 34 | 296 | 6 | 42.7 | 138.2 | 208.0 | ... | 38.5 | 0 | 0 | 0 | 0.2 | 53 | MID | Donyell Malen | 3 | 0.912162 |
10 rows × 22 columns
In [28]:
no_rules = df.query('yellow_cards > 5 or red_cards > 1').sort_values('yellow_cards', ascending=False)[['Full_Name', 'yellow_cards','selected_by_percent', 'influence', 'gc_per_game']]
no_rules.head(15)
Out[28]:
| Full_Name | yellow_cards | selected_by_percent | influence | gc_per_game | |
|---|---|---|---|---|---|
| 666 | Flynn Downes | 12 | 0.0 | 297.6 | 0.041880 |
| 347 | Saša Lukić | 12 | 0.0 | 335.6 | 0.076661 |
| 375 | Liam Delap | 12 | 3.3 | 513.0 | 0.487051 |
| 274 | Will Hughes | 11 | 0.0 | 335.2 | 0.171429 |
| 569 | Dan Burn | 11 | 12.6 | 736.6 | 0.054054 |
| 549 | Manuel Ugarte | 11 | 0.1 | 313.6 | 0.151941 |
| 219 | Moisés Caicedo Corozo | 11 | 1.0 | 627.6 | 0.134288 |
| 626 | Ryan Yates | 10 | 0.1 | 344.4 | 0.189374 |
| 107 | Dean Huijsen | 10 | 5.0 | 826.4 | 0.185797 |
| 576 | Joelinton Cássio Apolinário de Lira | 10 | 1.1 | 469.8 | 0.300752 |
| 281 | Daniel Muñoz | 10 | 19.3 | 816.6 | 0.278724 |
| 385 | Sam Morsy | 10 | 0.1 | 417.0 | 0.065478 |
| 741 | Lucas Tolentino Coelho de Lima | 10 | 0.9 | 433.2 | 0.189553 |
| 601 | Elliot Anderson | 10 | 0.3 | 638.8 | 0.264123 |
| 63 | Morgan Rogers | 10 | 25.8 | 730.6 | 0.548957 |
ICT Index Breakdown - influence, threat and creativity¶
In [29]:
# Ensure these columns are numeric
cols = ['influence', 'threat', 'creativity']
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
# Now you can safely scale them
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[['norm_influence', 'norm_threat', 'norm_creativity']] = scaler.fit_transform(df[cols])
# Compute the custom ICT score
df['ict_custom'] = (
0.5 * df['norm_influence'] +
0.3 * df['norm_threat'] +
0.2 * df['norm_creativity']
)
# View the top 10
print(df[['first_name', 'second_name', 'ict_custom']].sort_values('ict_custom', ascending=False).head(10))
first_name second_name ict_custom 464 Mohamed Salah 0.970377 135 Bryan Mbeumo 0.709687 242 Cole Palmer 0.676575 574 Alexander Isak 0.611611 523 Bruno Borges Fernandes 0.611417 493 Erling Haaland 0.579361 767 Matheus Santos Carneiro Da Cunha 0.559347 730 Jarrod Bowen 0.554502 99 Antoine Semenyo 0.531014 146 Yoane Wissa 0.504628
In [30]:
cor_matrix = df.corr(numeric_only=True)
cor_matrix['total_points'].sort_values(ascending=False)
Out[30]:
total_points 1.000000 bps 0.930018 ict_custom 0.924956 ict_index 0.911029 norm_influence 0.888418 influence 0.888418 clean_sheets 0.860499 minutes 0.855952 bonus 0.836223 gc 0.812560 norm_threat 0.789423 threat 0.789423 creativity 0.758385 norm_creativity 0.758385 assists 0.744160 goals_conceded 0.741313 goals_scored 0.738368 selected_by_percent 0.681181 yellow_cards 0.566471 gc_per_game 0.444055 now_cost 0.434292 red_cards 0.195832 Name: total_points, dtype: float64
Popularity & Performance¶
In [31]:
high_perf = df['total_points'] > df['total_points'].quantile(0.6)
low_select = df['selected_by_percent'] < df['selected_by_percent'].quantile(0.4)
outliers = df[high_perf & low_select]
In [32]:
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'notebook' # or 'browser', 'iframe', etc.
fig = px.scatter(
outliers,
x = 'selected_by_percent',
y = 'total_points',
hover_data=['Full_Name', 'element_type'],
labels = {'selected_by_percent': '% Selected by Managers',
'total_points': 'Total Fantasy Points'
}
)
fig.update_traces(marker = dict(size=10, color='red'), textposition='top center')
fig.update_layout(height=500)
fig.show()
In [33]:
pip install plotly
Requirement already satisfied: plotly in ./.pyenv/versions/3.10.8/lib/python3.10/site-packages (6.2.0) Requirement already satisfied: narwhals>=1.15.1 in ./.pyenv/versions/3.10.8/lib/python3.10/site-packages (from plotly) (1.48.1) Requirement already satisfied: packaging in ./.pyenv/versions/3.10.8/lib/python3.10/site-packages (from plotly) (24.0) [notice] A new release of pip is available: 24.3.1 -> 25.1.1 [notice] To update, run: pip install --upgrade pip Note: you may need to restart the kernel to use updated packages.
In [34]:
print(outliers.shape)
(12, 26)
In [35]:
outliers
Out[35]:
| first_name | second_name | goals_scored | assists | total_points | minutes | goals_conceded | creativity | influence | threat | ... | selected_by_percent | now_cost | element_type | Full_Name | gc | gc_per_game | norm_influence | norm_threat | norm_creativity | ict_custom | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 84 | David | Brooks | 2 | 1 | 52 | 941 | 12 | 316.7 | 193.6 | 300.0 | ... | 0.0 | 49 | MID | David Brooks | 3 | 0.286929 | 0.122765 | 0.151134 | 0.224977 | 0.151718 |
| 202 | Matt | O'Riley | 2 | 3 | 50 | 929 | 20 | 322.5 | 211.4 | 219.0 | ... | 0.0 | 55 | MID | Matt O'Riley | 5 | 0.484392 | 0.134052 | 0.110327 | 0.229097 | 0.145944 |
| 274 | Will | Hughes | 0 | 4 | 67 | 2100 | 31 | 462.6 | 335.2 | 90.0 | ... | 0.0 | 49 | MID | Will Hughes | 4 | 0.171429 | 0.212555 | 0.045340 | 0.328621 | 0.185604 |
| 305 | James | Garner | 0 | 1 | 42 | 1590 | 19 | 248.2 | 264.8 | 93.0 | ... | 0.0 | 49 | MID | James Garner | 1 | 0.056604 | 0.167914 | 0.046851 | 0.176316 | 0.133275 |
| 347 | Saša | Lukić | 0 | 2 | 56 | 2348 | 37 | 455.1 | 335.6 | 119.0 | ... | 0.0 | 48 | MID | Saša Lukić | 2 | 0.076661 | 0.212809 | 0.059950 | 0.323293 | 0.189048 |
| 362 | Sander | Berge | 0 | 0 | 53 | 2221 | 35 | 285.7 | 314.4 | 51.0 | ... | 0.0 | 50 | MID | Sander Berge | 0 | 0.000000 | 0.199366 | 0.025693 | 0.202955 | 0.147982 |
| 399 | Jack | Clarke | 0 | 4 | 53 | 1161 | 31 | 341.7 | 230.8 | 151.0 | ... | 0.0 | 54 | MID | Jack Clarke | 4 | 0.310078 | 0.146354 | 0.076071 | 0.242736 | 0.144545 |
| 609 | Nicolás | Domínguez | 0 | 1 | 54 | 1954 | 27 | 247.2 | 410.2 | 237.0 | ... | 0.0 | 48 | MID | Nicolás Domínguez | 1 | 0.046059 | 0.260114 | 0.119395 | 0.175606 | 0.200997 |
| 649 | Kamaldeen | Sulemana | 1 | 2 | 53 | 1394 | 36 | 182.4 | 187.2 | 290.0 | ... | 0.0 | 50 | MID | Kamaldeen Sulemana | 3 | 0.193687 | 0.118706 | 0.146096 | 0.129573 | 0.129097 |
| 666 | Flynn | Downes | 1 | 0 | 48 | 2149 | 49 | 292.2 | 297.6 | 118.0 | ... | 0.0 | 47 | MID | Flynn Downes | 1 | 0.041880 | 0.188713 | 0.059446 | 0.207573 | 0.153705 |
| 753 | Carlos | Soler | 1 | 1 | 47 | 1394 | 25 | 295.8 | 189.0 | 186.0 | ... | 0.0 | 50 | MID | Carlos Soler | 2 | 0.129125 | 0.119848 | 0.093703 | 0.210130 | 0.130061 |
| 794 | André | Trindade da Costa Neto | 0 | 0 | 61 | 2472 | 46 | 285.5 | 383.2 | 30.0 | ... | 0.0 | 50 | MID | André Trindade da Costa Neto | 0 | 0.000000 | 0.242993 | 0.015113 | 0.202813 | 0.166593 |
12 rows × 26 columns
In [ ]: